/*BEGIN_LEGAL 
Intel Open Source License 

Copyright (c) 2002-2011 Intel Corporation. All rights reserved.
 
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.  Redistributions
in binary form must reproduce the above copyright notice, this list of
conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.  Neither the name of
the Intel Corporation nor the names of its contributors may be used to
endorse or promote products derived from this software without
specific prior written permission.
 
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE INTEL OR
ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
END_LEGAL */
#include <stdio.h>
#if defined(TARGET_WINDOWS)
#include "windows.h"
#define EXPORT_CSYM extern "C" __declspec( dllexport )
#else
#define EXPORT_CSYM extern "C" 
#endif

#if defined(__cplusplus)
extern "C"
#endif
double var1=2.0;
#if defined(__cplusplus)
extern "C"
#endif
double var2=2.0;
#if defined(__cplusplus)
extern "C"
#endif
double var3=2.0;


#if defined(__cplusplus)
extern "C"
#endif
void Fld1_x ();
#if defined(__cplusplus)
extern "C"
#endif
void Fstp_x ();
#if defined(__cplusplus)
extern "C"
#endif
void Fld1Fstp_q (void (*pt2Function)());
#if defined(__cplusplus)
extern "C"
#endif
void Fld1Fstp_n1 (void (*pt2Function)());
#if defined(__cplusplus)
extern "C"
#endif
void Fld1Fstp_a ();
#if defined(__cplusplus)
extern "C"
#endif
void Fld1Fstp_b ();
#if defined(__cplusplus)
extern "C"
#endif
void Fld1Fstp_c ();
#if defined(__cplusplus)
extern "C"
#endif
void Fld1Fstp_d ();
#if defined(__cplusplus)
extern "C"
#endif
void Fld1Fstp_e ();
#if defined(__cplusplus)
extern "C"
#endif
void Fld1Fstp_f ();
#if defined(__cplusplus)
extern "C"
#endif
void Fld1Fstp_g ();
#if defined(__cplusplus)
extern "C"
#endif
void Fld1Fstp_h ();
#if defined(__cplusplus)
extern "C"
#endif
void Fld1Fstp_i ();
#if defined(__cplusplus)
extern "C"
#endif
void Fld1Fstp_j ();
#if defined(__cplusplus)
extern "C"
#endif
void Fld1Fstp_k ();
#if defined(__cplusplus)
extern "C"
#endif
void Fld1Fstp_l ();
#if defined(__cplusplus)
extern "C"
#endif
void Fld1Fstp_m (void (*pt2Function)());
#if defined(__cplusplus)
extern "C"
#endif
void Fld1Fstp_n (void (*pt2Function)());
#if defined(__cplusplus)
extern "C"
#endif
void Fld1FstpSetXmmsMaskMxcsr ();


#if defined(__cplusplus)
extern "C"
#endif
void IntScratchFld1Fstp_a();

#if defined(__cplusplus)
extern "C"
#endif
void IntScratchFld1Fstp_b();

#if defined(__cplusplus)
extern "C"
#endif
void IntScratchFld1Fstp_c();

#if defined(__cplusplus)
extern "C"
#endif
void IntScratchFld1Fstp_d();



#if defined(__cplusplus)
extern "C"
#endif
void MaskZeroDivideInMxcsr();

#if defined(__cplusplus)
extern "C"
#endif
void UnMaskZeroDivideInMxcsr();



#if defined( __GNUC__)

#include <stdint.h>
typedef uint8_t  UINT8;   //LINUX HOSTS
typedef uint16_t UINT16;
typedef uint32_t UINT32;
typedef uint64_t UINT64;
typedef int8_t  INT8;
typedef int16_t INT16;
typedef int32_t INT32;
typedef int64_t INT64;

#define ALIGN16 __attribute__ ((aligned(16)))
#define ALIGN8  __attribute__ ((aligned(8)))

#elif defined(_MSC_VER)

typedef unsigned __int8 UINT8 ;
typedef unsigned __int16 UINT16;
typedef unsigned __int32 UINT32;
typedef unsigned __int64 UINT64;


#define ALIGN16 __declspec(align(16))
#define ALIGN8  __declspec(align(8))

#else
#error Expect usage of either GNU or MS compiler.
#endif

#if defined(__cplusplus)
extern "C"
#endif
UINT64 GetMxcsr();

#define MAX_BYTES_PER_XMM_REG 16
#define MAX_WORDS_PER_XMM_REG (MAX_BYTES_PER_XMM_REG/2)
#define MAX_DWORDS_PER_XMM_REG (MAX_WORDS_PER_XMM_REG/2)
#define MAX_QWORDS_PER_XMM_REG (MAX_DWORDS_PER_XMM_REG/2)
#define MAX_FLOATS_PER_XMM_REG (MAX_BYTES_PER_XMM_REG/sizeof(float))
#define MAX_DOUBLES_PER_XMM_REG (MAX_BYTES_PER_XMM_REG/sizeof(double))

extern "C" ALIGN16 UINT64 vals[]=
{1,0,2,0,3,0,4,0,5,0,6,0,7,0,8,0,9,0,10,0,11,0,12,0,13,0,14,0,15,0,16,0};

extern "C"  UINT64* val1=&vals[0];

extern "C"  UINT64* val2=&vals[2];

extern "C"  UINT64* val3=&vals[4];

extern "C"  UINT64* val4=&vals[6];

extern "C"  UINT64* val5=&vals[8];

extern "C"  UINT64* val6=&vals[10];

extern "C"  UINT64* val7=&vals[12];

extern "C"  UINT64* val8=&vals[14];

extern "C"  UINT64* val9=&vals[16];

extern "C"  UINT64* val10=&vals[18];

extern "C"  UINT64* val11=&vals[20];

extern "C"  UINT64* val12=&vals[22];

extern "C"  UINT64* val13=&vals[24];

extern "C"  UINT64* val14=&vals[26];

extern "C"  UINT64* val15=&vals[28];

extern "C"  UINT64* val16=&vals[30];

#if defined(TARGET_IA32)
typedef UINT32 ADDRINT;
#else
typedef UINT64 ADDRINT;
#endif

extern "C" ADDRINT setFlagsX=0;

union ALIGN16 xmm_reg_t
{
    UINT8  byte[MAX_BYTES_PER_XMM_REG];
    UINT16 word[MAX_WORDS_PER_XMM_REG];
    UINT32 dword[MAX_DWORDS_PER_XMM_REG];
    UINT64 qword[MAX_QWORDS_PER_XMM_REG];

    INT8   s_byte[MAX_BYTES_PER_XMM_REG];
    INT16  s_word[MAX_WORDS_PER_XMM_REG];
    INT32  s_dword[MAX_DWORDS_PER_XMM_REG];
    INT64  s_qword[MAX_QWORDS_PER_XMM_REG];

    float  flt[MAX_FLOATS_PER_XMM_REG];
    double dbl[MAX_DOUBLES_PER_XMM_REG];

};



#ifdef TARGET_IA32E
#define NUM_XMM_REGS 16
#if defined( __GNUC__)
#define NUM_XMM_SCRATCH_REGS 16
#else
#define NUM_XMM_SCRATCH_REGS 5
#endif
typedef UINT64 reg_t;
#else
#define NUM_XMM_REGS 8
#define NUM_XMM_SCRATCH_REGS 8
typedef UINT32 reg_t;
#endif


extern "C" reg_t get_gax();
extern "C" reg_t get_gcx();
extern "C" reg_t get_gdx();
extern "C" void set_gax(reg_t regVal);
extern "C" void set_gcx(reg_t regVal);
extern "C" void set_gdx(reg_t regVal);
#if defined(__GNUC__)
#include <stdint.h>
static void get_xmm_regA(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %%xmm0,%0" : "=m" (xmm_reg)  );
}

static void set_xmm_reg0(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %0, %%xmm0" :  : "m" (xmm_reg) : "%xmm0"  );
} 

static void get_xmm_reg0(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %%xmm0,%0" : "=m" (xmm_reg)  );
}


static void set_xmm_reg1(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %0, %%xmm1" :  : "m" (xmm_reg) : "%xmm1"  );
} 

static void get_xmm_reg1(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %%xmm1, %0" : "=m" (xmm_reg)  );
}



static void set_xmm_reg2(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %0, %%xmm2" :  : "m" (xmm_reg) : "%xmm2"  );
} 

static void get_xmm_reg2(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %%xmm2,%0" : "=m" (xmm_reg)  );
}

static void set_xmm_reg3(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %0, %%xmm3" :  : "m" (xmm_reg) : "%xmm3"  );
} 

static void get_xmm_reg3(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %%xmm3,%0" : "=m" (xmm_reg)  );
}

static void set_xmm_reg4(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %0, %%xmm4" :  : "m" (xmm_reg) : "%xmm4"  );
} 

static void get_xmm_reg4(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %%xmm4,%0" : "=m" (xmm_reg)  );
}

static void set_xmm_reg5(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %0, %%xmm5" :  : "m" (xmm_reg) : "%xmm5"  );
} 

static void get_xmm_reg5(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %%xmm5,%0" : "=m" (xmm_reg)  );
}

static void set_xmm_reg6(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %0, %%xmm6" :  : "m" (xmm_reg) : "%xmm6"  );
} 

static void get_xmm_reg6(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %%xmm6,%0" : "=m" (xmm_reg)  );
}

static void set_xmm_reg7(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %0, %%xmm7" :  : "m" (xmm_reg) : "%xmm7"  );
} 

static void get_xmm_reg7(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %%xmm7,%0" : "=m" (xmm_reg)  );
}
#ifdef TARGET_IA32E
static void get_xmm_reg8(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %%xmm8,%0" : "=m" (xmm_reg)  );
}


static void get_xmm_reg9(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %%xmm9,%0" : "=m" (xmm_reg)  );
}

static void get_xmm_reg10(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %%xmm10,%0" : "=m" (xmm_reg)  );
}

static void get_xmm_reg11(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %%xmm11,%0" : "=m" (xmm_reg)  );
}

static void get_xmm_reg12(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %%xmm12,%0" : "=m" (xmm_reg)  );
}

static void get_xmm_reg13(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %%xmm13,%0" : "=m" (xmm_reg)  );
}

static void get_xmm_reg14(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %%xmm14,%0" : "=m" (xmm_reg)  );
}

static void get_xmm_reg15(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %%xmm15,%0" : "=m" (xmm_reg)  );
}

static void set_xmm_reg8(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %0, %%xmm8" :  : "m" (xmm_reg) : "%xmm8"  );
}

static void set_xmm_reg9(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %0, %%xmm9" :  : "m" (xmm_reg) : "%xmm9"  );
}

static void set_xmm_reg10(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %0, %%xmm10" :  : "m" (xmm_reg) : "%xmm10"  );
}

static void set_xmm_reg11(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %0, %%xmm11" :  : "m" (xmm_reg) : "%xmm11"  );
}

static void set_xmm_reg12(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %0, %%xmm12" :  : "m" (xmm_reg) : "%xmm12"  );
}

static void set_xmm_reg13(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %0, %%xmm13" :  : "m" (xmm_reg) : "%xmm13"  );
}

static void set_xmm_reg14(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %0, %%xmm14" :  : "m" (xmm_reg) : "%xmm14"  );
}

static void set_xmm_reg15(xmm_reg_t& xmm_reg)
{
    asm volatile("movdqu %0, %%xmm15" :  : "m" (xmm_reg) : "%xmm15"  );
}

extern "C" reg_t get_r12();
extern "C" reg_t get_r13();
extern "C" reg_t get_r14();
extern "C" reg_t get_r15();
#endif

#else
extern "C" void set_xmm_reg0(xmm_reg_t& xmm_reg);
extern "C" void get_xmm_reg0(xmm_reg_t& xmm_reg);
extern "C" void set_xmm_reg1(xmm_reg_t& xmm_reg);
extern "C" void get_xmm_reg1(xmm_reg_t& xmm_reg);
extern "C" void set_xmm_reg2(xmm_reg_t& xmm_reg);
extern "C" void get_xmm_reg2(xmm_reg_t& xmm_reg);
extern "C" void set_xmm_reg3(xmm_reg_t& xmm_reg);
extern "C" void get_xmm_reg3(xmm_reg_t& xmm_reg);
extern "C" void set_xmm_reg4(xmm_reg_t& xmm_reg);
extern "C" void get_xmm_reg4(xmm_reg_t& xmm_reg);
extern "C" void set_xmm_reg5(xmm_reg_t& xmm_reg);
extern "C" void get_xmm_reg5(xmm_reg_t& xmm_reg);
extern "C" void set_xmm_reg6(xmm_reg_t& xmm_reg);
extern "C" void get_xmm_reg6(xmm_reg_t& xmm_reg);
extern "C" void set_xmm_reg7(xmm_reg_t& xmm_reg);
extern "C" void get_xmm_reg7(xmm_reg_t& xmm_reg);
#ifdef TARGET_IA32E
extern "C" void set_xmm_reg8(xmm_reg_t& xmm_reg);
extern "C" void get_xmm_reg8(xmm_reg_t& xmm_reg);
extern "C" void set_xmm_reg9(xmm_reg_t& xmm_reg);
extern "C" void get_xmm_reg9(xmm_reg_t& xmm_reg);
extern "C" void set_xmm_reg10(xmm_reg_t& xmm_reg);
extern "C" void get_xmm_reg10(xmm_reg_t& xmm_reg);
extern "C" void set_xmm_reg11(xmm_reg_t& xmm_reg);
extern "C" void get_xmm_reg11(xmm_reg_t& xmm_reg);
extern "C" void set_xmm_reg12(xmm_reg_t& xmm_reg);
extern "C" void get_xmm_reg12(xmm_reg_t& xmm_reg);
extern "C" void set_xmm_reg13(xmm_reg_t& xmm_reg);
extern "C" void get_xmm_reg13(xmm_reg_t& xmm_reg);
extern "C" void set_xmm_reg14(xmm_reg_t& xmm_reg);
extern "C" void get_xmm_reg14(xmm_reg_t& xmm_reg);
extern "C" void set_xmm_reg15(xmm_reg_t& xmm_reg);
extern "C" void get_xmm_reg15(xmm_reg_t& xmm_reg);

#endif
#endif
#ifdef TARGET_IA32E
extern "C" reg_t get_gdi();
extern "C" reg_t get_gsi();
extern "C" reg_t get_r8();
extern "C" reg_t get_r9();
extern "C" reg_t get_r10();
extern "C" reg_t get_r11();
extern "C" void set_gdi(reg_t regVal);
extern "C" void set_gsi(reg_t regVal);
extern "C" void set_r8(reg_t regVal);
extern "C" void set_r9(reg_t regVal);
extern "C" void set_r10(reg_t regVal);
extern "C" void set_r11(reg_t regVal);
#endif



static void
set_xmm_reg(xmm_reg_t& xmm_reg, UINT32 reg_no)
{

    switch (reg_no)
    {
    case 0:
        set_xmm_reg0(xmm_reg);
        break;
    case 1:
        set_xmm_reg1(xmm_reg);
        break;
    case 2:
        set_xmm_reg2(xmm_reg);
        break;
    case 3:
        set_xmm_reg3(xmm_reg);
        break;
    case 4:
        set_xmm_reg4(xmm_reg);
        break;
    case 5:
        set_xmm_reg5(xmm_reg);
        break;
    case 6:
        set_xmm_reg6(xmm_reg);
        break;
    case 7:
        set_xmm_reg7(xmm_reg);
        break;
#ifdef TARGET_IA32E
    case 8:
        set_xmm_reg8(xmm_reg);
        break;
    case 9:
        set_xmm_reg9(xmm_reg);
        break;
    case 10:
        set_xmm_reg10(xmm_reg);
        break;
    case 11:
        set_xmm_reg11(xmm_reg);
        break;
    case 12:
        set_xmm_reg12(xmm_reg);
        break;
    case 13:
        set_xmm_reg13(xmm_reg);
        break;
    case 14:
        set_xmm_reg14(xmm_reg);
        break;
    case 15:
        set_xmm_reg15(xmm_reg);
        break;
#endif
    }

} 
static void
get_xmm_reg(xmm_reg_t& xmm_reg, UINT32 reg_no)
{
    switch (reg_no)
    {
    case 0:
       get_xmm_reg0(xmm_reg);
       break;
    case 1:
        get_xmm_reg1(xmm_reg);
        break;
    case 2:
        get_xmm_reg2(xmm_reg);
        break;
    case 3:
        get_xmm_reg3(xmm_reg);
        break;
    case 4:
        get_xmm_reg4(xmm_reg);
        break;
    case 5:
        get_xmm_reg5(xmm_reg);
        break;
    case 6:
        get_xmm_reg6(xmm_reg);
        break;
    case 7:
        get_xmm_reg7(xmm_reg);
        break;
#ifdef TARGET_IA32E
    case 8:
        get_xmm_reg8(xmm_reg);
        break;
    case 9:
        get_xmm_reg9(xmm_reg);
        break;
    case 10:
        get_xmm_reg10(xmm_reg);
        break;
    case 11:
        get_xmm_reg11(xmm_reg);
        break;
    case 12:
        get_xmm_reg12(xmm_reg);
        break;
    case 13:
        get_xmm_reg13(xmm_reg);
        break;
    case 14:
        get_xmm_reg14(xmm_reg);
        break;
    case 15:
        get_xmm_reg15(xmm_reg);
        break;
#endif
    }

}
void write_xmm_reg(UINT32 reg_no, UINT32 val)
{

    xmm_reg_t xmm;
    xmm.dword[0] = val;
    xmm.dword[1] = val;
    xmm.dword[2] = val;
    xmm.dword[3] = val;
    
    set_xmm_reg(xmm, reg_no); 
}

void read_xmm_reg(UINT32 reg_no, xmm_reg_t& xmm)
{

    xmm.dword[0] = 0;
    xmm.dword[1] = 0;
    xmm.dword[2] = 0;
    xmm.dword[3] = 0;
    
    get_xmm_reg(xmm, reg_no); 
}


EXPORT_CSYM void TestIargPreserveInReplacement()
{
}

EXPORT_CSYM void TestIargPreserveInReplacement1()
{
}

EXPORT_CSYM void TestIargPreserveInReplacement2()
{
}

EXPORT_CSYM void  TestIargPreserveInProbed()
{
}

EXPORT_CSYM void  TestIargPreserveInProbed1()
{
}

EXPORT_CSYM void  TestIargPreserveInProbed2()
{
}

int main()
{

    var1=2.0;
    var2=2.0;
    var3=2.0;

    // the test_iarg_preserve tool will insert a call to an analysis routine  
    // after the third fld1 in Fld1Fstp that sets the first 3 x87 regs to 0
    Fld1Fstp_a();

    // make sure the values were changed by the tool test_iarg_preserve - since that tool
    // specified that it's analysis routine preserved the x87 regs when it doesn't
    if (var1 != 0.0 || var2 != 0.0 || var3 != 0.0)
    {
        printf ("***Error in x87 values_1 var1 %f var2 %f var3 %f\n", var1, var2, var3);
       return (-1);
    }

    var1=2.0;
    var2=2.0;
    var3=2.0;

    // the test_iarg_preserve tool will insert a call to an analysis routine  
    // after the third fld1 in Fld1Fstp that sets the first 3 x87 regs to 0
    Fld1Fstp_b();

    
    // make sure the values were NOT changed by the tool test_iarg_preserve - since that tool
    // specified that it's analysis routine does not preserve the x87 regs
    if (var1 != 1.0 || var2 != 1.0 || var3 != 1.0)
    {
        printf ("***Error in x87 values_2 var1 %f var2 %f var3 %f\n", var1, var2, var3 );
       return (-1);
    }
    
    var1=2.0;
    var2=2.0;
    var3=2.0;

    // the test_iarg_preserve tool will insert a call to an analysis routine  
    // after the third fld1 in Fld1Fstp that sets the first 3 x87 regs to 0
    Fld1Fstp_c();
    
    // make sure the values were NOT changed by the tool test_iarg_preserve - since that tool
    // did not specify any IARG_PRESERVE and Pin saves the x87 state by default 
    if (var1 != 1.0 || var2 != 1.0 || var3 != 1.0)
    {
        printf ("***Error in x87 values_3\n");
       return (-1);
    }
    

    var1=2.0;
    var2=2.0;
    var3=2.0;

    // the test_iarg_preserve tool will insert a call to an analysis routine  
    // after the third fld1 in Fld1Fstp that sets the first 3 x87 regs to 0
    Fld1Fstp_d();

    
    // make sure the values were NOT changed by the tool test_iarg_preserve - since that tool
    // used IARG_PRESERVE to specify that no regs are preserved at Pin saves the x87 state by default 
    if (var1 != 1.0 || var2 != 1.0 || var3 != 1.0)
    {
        printf ("***Error in x87 values_4\n");
       return (-1);
    }

    
    xmm_reg_t xmm_regs[NUM_XMM_SCRATCH_REGS];
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
       xmm_regs[i].dword[0] = xmm_regs[i].dword[1] = xmm_regs[i].dword[2] = xmm_regs[i].dword[3] = 0;
    }
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        write_xmm_reg(i, 0xdeadbeef);
    }
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        read_xmm_reg(i, xmm_regs[i]);
    }
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        if (!(xmm_regs[i].dword[0]==0xdeadbeef && xmm_regs[i].dword[1]==0xdeadbeef &&
            xmm_regs[i].dword[2]==0xdeadbeef && xmm_regs[i].dword[3]==0xdeadbeef))
        {
            printf ("***Error in initializing xmm regs\n");
            return (-1);
        }
    }
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        write_xmm_reg(i, 0xdeadbeef);
    }

    // the test_iarg_preserve tool will insert a call to an analysis routine  
    // after the third fld1 in Fld1Fstp that sets the xmmregs to 0
    Fld1Fstp_e();

    // make sure the values were changed by the tool test_iarg_preserve - since that tool
    // specified that it's analysis routine preserved the xmm regs when it doesn't
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        read_xmm_reg(i, xmm_regs[i]);
    }
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        if (!(xmm_regs[i].dword[0]==0 && xmm_regs[i].dword[1]==0 &&
            xmm_regs[i].dword[2]==0 && xmm_regs[i].dword[3]==0))
        {
            printf ("***Error in xmm regs values_1 xmm%d\n", i);
           return (-1);
        }
    }

    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        write_xmm_reg(i, 0xdeadbeef);
    }

    // the test_iarg_preserve tool will insert a call to an analysis routine  
    // after the third fld1 in Fld1Fstp that sets the xmmregs to 0
    Fld1Fstp_f();

    // make sure the values were NOT changed by the tool test_iarg_preserve - since that tool
    // specified that it's analysis routine does not preserve the x87 regs
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        read_xmm_reg(i, xmm_regs[i]);
    }
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        if (!(xmm_regs[i].dword[0]==0xdeadbeef && xmm_regs[i].dword[1]==0xdeadbeef &&
            xmm_regs[i].dword[2]==0xdeadbeef && xmm_regs[i].dword[3]==0xdeadbeef))
        {
            printf ("***Error in xmm regs values_2 xmm%d\n", i);
            return (-1);
        }
    }

    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        write_xmm_reg(i, 0xdeadbeef);
    }

    // the test_iarg_preserve tool will insert a call to an analysis routine  
    // after the third fld1 in Fld1Fstp that sets the xmmregs to 0
    Fld1Fstp_g();

    // make sure the values were NOT changed by the tool test_iarg_preserve - since that tool
    // did not specify any IARG_PRESERVE at Pin saves the xmm regs 
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        read_xmm_reg(i, xmm_regs[i]);
    }
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        if (!(xmm_regs[i].dword[0]==0xdeadbeef && xmm_regs[i].dword[1]==0xdeadbeef &&
            xmm_regs[i].dword[2]==0xdeadbeef && xmm_regs[i].dword[3]==0xdeadbeef))
        {
            printf ("***Error in xmm regs values_3 xmm%d\n", i);
            return (-1);
        }
    }


    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        write_xmm_reg(i, 0xdeadbeef);
    }

    // the test_iarg_preserve tool will insert a call to an analysis routine  
    // after the third fld1 in Fld1Fstp that sets the xmmregs to 0
    Fld1Fstp_h();

    // make sure the values were NOT changed by the tool test_iarg_preserve - since that tool
    // did not specify any IARG_PRESERVE at Pin saves the xmm regs 
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        read_xmm_reg(i, xmm_regs[i]);
    }
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        if (!(xmm_regs[i].dword[0]==0xdeadbeef && xmm_regs[i].dword[1]==0xdeadbeef &&
            xmm_regs[i].dword[2]==0xdeadbeef && xmm_regs[i].dword[3]==0xdeadbeef))
        {
            printf ("***Error in xmm regs values_4 xmm%d\n", i);
            return (-1);
        }
    }

    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        write_xmm_reg(i, 0xdeadbeef);
    }

    // the test_iarg_preserve tool will insert a call to an analysis routine  
    // after the third fld1 in Fld1Fstp that sets the xmmregs to 0
    Fld1Fstp_i();

    // make sure the values were NOT changed by the tool test_iarg_preserve - since that tool
    // did not specify any IARG_PRESERVE 
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        read_xmm_reg(i, xmm_regs[i]);
    }
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        if (!(xmm_regs[i].dword[0]==0xdeadbeef && xmm_regs[i].dword[1]==0xdeadbeef &&
            xmm_regs[i].dword[2]==0xdeadbeef && xmm_regs[i].dword[3]==0xdeadbeef))
        {
            printf ("***Error in xmm regs values_5 xmm%d\n", i);
            return (-1);
        }
    }

    reg_t regGax, regGcx, regGdx, regGsi, regGdi, regR8, regR9, regR10, regR11;
    reg_t savedGax, savedGcx, savedGdx, savedGsi, savedGdi, savedR8, savedR9, savedR10, savedR11;

#if 0
    // this test not run - because overwriting the integer registers can cause Pin to fail
    savedGax = get_gax(); // must be first
    savedGcx = get_gcx();
    savedGdx = get_gdx();
#ifdef TARGET_IA32E 
    savedR8 = get_r8();
    savedR9 = get_r9();
    savedR10 = get_r10();
    savedR11 = get_r11();
#if defined( __GNUC__)
    savedGdi = get_gdi();
    savedGsi = get_gsi();
#endif
#endif

    // the test_iarg_preserve tool will insert a call to an analysis routine  
    // after the third fld1 in IntScratchFld1Fstp_a that sets the scratch registers to 0
    IntScratchFld1Fstp_a();


    // make sure values were changed by the tool test_iarg_preserve - since that tool
    // specified that it's analysis routine preserved the integer scratch regs when it doesn't
    // NOTE - due to Pin's register allocation, it may be that some of the scratch registers
    // are NOT changed - because they were in the spill area, this is OK. We assume that at 
    // least one of the scratch registers will not be in the spill area
    regGax = get_gax(); // must be first
    regGcx = get_gcx();
    regGdx = get_gdx();
    
#ifdef TARGET_IA32E 
    regR8 = get_r8();
    regR9 = get_r9();
    regR10 = get_r10();
    regR11 = get_r11();
#if defined( __GNUC__)
    regGdi = get_gdi();
    regGsi = get_gsi();
#endif
#endif

    // reset the values to what they were
    set_gax(savedGax); 
#if defined( __GNUC__)
    set_gcx(savedGcx);
#endif
    set_gdx(savedGdx);
#ifdef TARGET_IA32E 
    set_r8(savedR8);
    set_r9(savedR9);
    set_r10(savedR10);
    set_r11(savedR11);
#if defined( __GNUC__)
    set_gdi(savedGdi);
    set_gsi(savedGsi);
#endif
#endif
#if !defined( __GNUC__)
    set_gcx(savedGcx);
#endif

    if (!(regGax==0 || regGcx==0 || regGdx==0 
#ifdef TARGET_IA32E
           || regR8==0 || regR9==0 || regR10==0 || regR11==0
#if defined( __GNUC__)
          || regGdi==0 || regGsi==0
#endif
#endif
       ))
    {
        printf ("***Error in int scratch regs values\n");
        printf ("***Even though test will fail on this, it may not be a true failure - see the above comment in the test\n");
        return (-1);
    }

#endif

    // the test_iarg_preserve tool will insert a call to an analysis routine  
    // after the third fld1 in IntScratchFld1Fstp_a that sets the scratch registers to 0
    IntScratchFld1Fstp_b();

    // make sure the values were NOT changed by the tool test_iarg_preserve - since that tool
    // specified that it's analysis routine does not preserve the integer scratch regs
    regGax = get_gax(); // must be first
    regGcx = get_gcx(); 
    regGdx = get_gdx(); 
    if (!(regGax==0xdeadbeef && regGcx==0xdeadbeef && regGdx==0xdeadbeef))
    {
        printf ("***Error in int scratch regs values_2\n");
        return (-1);
    }
#ifdef TARGET_IA32E 
    regR8 = get_r8();
    regR9 = get_r9();
    regR10 = get_r10();
    regR11 = get_r11();
    if (!(regR8==0xdeadbeef && regR9==0xdeadbeef && regR10==0xdeadbeef && regR11==0xdeadbeef))
    {
        printf ("***Error in int scratch regs values_2_ia32e regR8 %p regR9 %p regR10 %p regR11 %p\n",
                reinterpret_cast<void *>(regR8), reinterpret_cast<void *>(regR9), 
                reinterpret_cast<void *>(regR10), reinterpret_cast<void *>(regR11));
        return (-1);
    }
#if defined( __GNUC__)
    regGdi = get_gdi();
    regGsi = get_gsi();
    if (!(regGdi==0xdeadbeef && regGsi==0xdeadbeef))
    {
        printf ("***Error in int scratch regs values_2_ia32e linux\n");
        return (-1);
    }
#endif
#endif

    // the test_iarg_preserve tool will insert a call to an analysis routine  
    // after the third fld1 in IntScratchFld1Fstp_a that sets the scratch registers to 0
    IntScratchFld1Fstp_c();

    // make sure the values were NOT changed by the tool test_iarg_preserve - since that tool
    // did not specify any IARG_PRESERVE
    regGax = get_gax(); // must be first
    regGcx = get_gcx(); 
    regGdx = get_gdx();
    if (!(regGax==0xdeadbeef && regGcx==0xdeadbeef && regGdx==0xdeadbeef))
    {
        printf ("***Error in int scratch regs values_3\n");
        return (-1);
    }
#ifdef TARGET_IA32E 
    regR8 = get_r8();
    regR9 = get_r9();
    regR10 = get_r10();
    regR11 = get_r11();
    if (!(regR8==0xdeadbeef && regR9==0xdeadbeef && regR10==0xdeadbeef && regR11==0xdeadbeef))
    {
        printf ("***Error in int scratch regs values_3_ia32e\n");
        return (-1);
    }
#if defined( __GNUC__)
    regGdi = get_gdi();
    regGsi = get_gsi();
    if (!(regGdi==0xdeadbeef && regGsi==0xdeadbeef))
    {
        printf ("***Error in int scratch regs values_3_ia32e linux\n");
        return (-1);
    }
#endif
#endif

    // the test_iarg_preserve tool will insert a call to an analysis routine  
    // after the third fld1 in IntScratchFld1Fstp_a that sets the scratch registers to 0
    IntScratchFld1Fstp_d();

    // make sure the values were NOT changed by the tool test_iarg_preserve - since that tool
    // did not specify any IARG_PRESERVE
    regGax = get_gax(); // must be first
    regGcx = get_gcx(); 
    regGdx = get_gdx(); 
    
    if (!(regGax==0xdeadbeef && regGcx==0xdeadbeef && regGdx==0xdeadbeef))
    {
        printf ("***Error in int scratch regs values_4\n");
        return (-1);
    }
    
#ifdef TARGET_IA32E 
    regR8 = get_r8();
    regR9 = get_r9();
    regR10 = get_r10();
    regR11 = get_r11();
    if (!(regR8==0xdeadbeef && regR9==0xdeadbeef && regR10==0xdeadbeef && regR11==0xdeadbeef))
    {
        printf ("***Error in int scratch regs values_4_ia32e\n");
        return (-1);
    }
#if defined( __GNUC__)
    regGdi = get_gdi();
    regGsi = get_gsi();
    if (!(regGdi==0xdeadbeef && regGsi==0xdeadbeef))
    {
        printf ("***Error in int scratch regs values_4_ia32e linux\n");
        return (-1);
    }
#endif
#endif
    reg_t curMxcsr;
    UnMaskZeroDivideInMxcsr();
    curMxcsr = GetMxcsr();
    if ((curMxcsr & 0x200))
    {
        printf ("***Error0 in UNmasking the divide by zero exception mask in mxcsr\n");
        return (-1);
    }
    MaskZeroDivideInMxcsr();
    curMxcsr = GetMxcsr();
    if (!(curMxcsr & 0x200))
    {
        printf ("***Error1 in masking the divide by zero exception mask in mxcsr\n");
        return (-1);
    }
    // the test_iarg_preserve tool will insert a call to an analysis routine  
    // after the third fld1 in Fld1Fstp that unmasks the divide by zero exception mask in mxcsr
    Fld1Fstp_j();

    // make sure the  divide by zero exception mask in mxcsr is not masked - since that tool
    // specified IARG_PRESERVE of the mxcsr mask when it really unmasked the the divide by zero exception mask in mxcsr
    curMxcsr = GetMxcsr();
    if (curMxcsr & 0x200)
    {
        printf ("***Error expected the divide by zero exception mask in mxcsr to have been unmasked by the tool\n");
        return (-1);
    }

    MaskZeroDivideInMxcsr();
    curMxcsr = GetMxcsr();
    if (!(curMxcsr & 0x200))
    {
        printf ("***Error2 in masking the divide by zero exception mask in mxcsr\n");
        return (-1);
    }

    // the test_iarg_preserve tool will insert a call to an analysis routine  
    // after the third fld1 in Fld1Fstp that unmasks the divide by zero exception mask in mxcsr
    Fld1Fstp_k();

    // make sure the  divide by zero exception mask in mxcsr is not unmasked - since the tool
    // did not specify any IARG_PRESERVE and Pin preserves the mxcsr by default
    curMxcsr = GetMxcsr();
    if (!(curMxcsr & 0x200))
    {
        printf ("***Error1 expected the divide by zero exception mask in mxcsr to remain unmasked around the analysis call in the tool\n");
        return (-1);
    }

    MaskZeroDivideInMxcsr();
    curMxcsr = GetMxcsr();
    if (!(curMxcsr & 0x200))
    {
        printf ("***Error3 in masking the divide by zero exception mask in mxcsr\n");
        return (-1);
    }

    
    // the test_iarg_preserve tool will insert a call to an analysis routine  
    // after the third fld1 in Fld1Fstp that unmasks the divide by zero exception mask in mxcsr
    Fld1Fstp_l();

    // make sure the  divide by zero exception mask in mxcsr is not unmasked - since the tool
    // did not specify any IARG_PRESERVE on REG_MXCSR and Pin preserves the mxcsr by default
    curMxcsr = GetMxcsr();
    if (!(curMxcsr & 0x200))
    {
        printf ("***Error2 expected the divide by zero exception mask in mxcsr to remain unmasked around the analysis call in the tool\n");
        return (-1);
    }

    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        write_xmm_reg(i, 0xbaadf00d);
    }
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        read_xmm_reg(i, xmm_regs[i]);
    }
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        if (!(xmm_regs[i].dword[0]==0xbaadf00d && xmm_regs[i].dword[1]==0xbaadf00d &&
            xmm_regs[i].dword[2]==0xbaadf00d && xmm_regs[i].dword[3]==0xbaadf00d))
        {
            printf ("***Error2 in initializing xmm regs\n");
            return (-1);
        }
    }
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        write_xmm_reg(i, 0xbaadf00d);
    }

    UnMaskZeroDivideInMxcsr();
    curMxcsr = GetMxcsr();
    if ((curMxcsr & 0x200))
    {
        printf ("***Error4 in masking the divide by zero exception mask in mxcsr\n");
        return (-1);
    }
    var1=2.0;
    var2=2.0;
    var3=2.0;
    // the test_iarg_preserve tool will insert a call to an analysis routine  
    // after the third fld1 that sets the first 3 x87 regs to 0 
    
    Fld1FstpSetXmmsMaskMxcsr();
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        read_xmm_reg(i, xmm_regs[i]);
    }
    // make sure the values were NOT changed by the tool test_iarg_preserve - since that tool
    // did not specify any IARG_PRESERVE and Pin saves the x87 state by default 
    if (var1 != 1.0 || var2 != 1.0 || var3 != 1.0)
    {
        printf ("***Error in x87 values_3\n");
       return (-1);
    }
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        if (!(xmm_regs[i].dword[0]==0 && xmm_regs[i].dword[1]==0 &&
            xmm_regs[i].dword[2]==0 && xmm_regs[i].dword[3]==0))
        {
            printf ("***Error xmm values after fxrstor i %d  %x %x %x %x\n", i, xmm_regs[i].dword[0], xmm_regs[i].dword[1],
                    xmm_regs[i].dword[2], xmm_regs[i].dword[3]);
            return (-1);
        }
    }
    // make sure the  divide by zero exception mask in mxcsr is not unmasked 
    curMxcsr = GetMxcsr();
    if (!(curMxcsr & 0x200))
    {
        printf ("***Error3 expected the divide by zero exception mask in mxcsr to remain unmasked around the analysis call in the tool\n");
        return (-1);
    }

    var1=2.0;
    var2=2.0;
    var3=2.0;

    Fld1Fstp_m((TestIargPreserveInReplacement)); // this function calls TestIargPreserveInReplacement, which is replaced by the tool 

    // make sure the values were changed by the tool test_iarg_preserve - since that tool
    // specified that it's replacement routine preserved the x87 regs when it doesn't
    if (var1 != 0.0 || var2 != 0.0 || var3 != 0.0)
    {
        printf ("***Error in x87 values_1a var1 %f var2 %f var3 %f\n", var1, var2, var3);
        return (-1);
    }

    var1=2.0;
    var2=2.0;
    var3=2.0;

    Fld1Fstp_n((TestIargPreserveInReplacement1)); // this function calls TestIargPreserveInReplacement, which is replaced by the tool 

    // make sure the values were NOT changed by the tool test_iarg_preserve - since that tool
    // specified that it's replacement routine does not preserve the x87 regs
    if (var1 != 1.0 || var2 != 1.0 || var3 != 1.0)
    {
        printf ("***Error in x87 values_1a var1 %f var2 %f var3 %f\n", var1, var2, var3);
        return (-1);
    }


     for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        write_xmm_reg(i, 0xbaadf00d);
    }
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        read_xmm_reg(i, xmm_regs[i]);
    }
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        if (!(xmm_regs[i].dword[0]==0xbaadf00d && xmm_regs[i].dword[1]==0xbaadf00d &&
            xmm_regs[i].dword[2]==0xbaadf00d && xmm_regs[i].dword[3]==0xbaadf00d))
        {
            printf ("***Error2 in initializing xmm regs\n");
            return (-1);
        }
    }
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        write_xmm_reg(i, 0xbaadf00d);
    }

    Fld1_x();
    Fld1Fstp_n1((TestIargPreserveInReplacement2)); // this function calls TestIargPreserveInReplacement2, which is replaced by the tool 
    Fstp_x();

    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        read_xmm_reg(i, xmm_regs[i]);
    }
    
    for (UINT32 i=0; i<NUM_XMM_SCRATCH_REGS; i++)
    {
        if (!(xmm_regs[i].dword[0]==0xbaadf00d && xmm_regs[i].dword[1]==0xbaadf00d &&
            xmm_regs[i].dword[2]==0xbaadf00d && xmm_regs[i].dword[3]==0xbaadf00d))
        {
            printf ("***Error xmm values_1 after fxrstor i %d  %x %x %x %x\n", i, xmm_regs[i].dword[0], xmm_regs[i].dword[1],
                    xmm_regs[i].dword[2], xmm_regs[i].dword[3]);
            return (-1);
        }
    }

    if (var1 != 1.0 || var2 != 1.0 || var3 != 1.0)
    {
        printf ("***Error in x87 values_1b var1 %f var2 %f var3 %f\n", var1, var2, var3);
        return (-1);
    }

    printf ("app finished\n");
    fflush (stdout);
    return(0);
}

